	
clear
set more off
cap log close

***************************************************************************************************
* Program: figure4.do
* Purpose: Produce Figure 4A and 4B.
***************************************************************************************************

************************************************************************
** set macros (directory)
************************************************************************
global prison_dir "~/Dropbox/Prison_Covid/PNAS_Nexus_Replication"
global raw_data "${prison_dir}/raw_data"
global intermediate_data "${prison_dir}/intermediate_data"
global code "${prison_dir}/code"
global analysis_data "${prison_dir}/analysis_data"
global output "${prison_dir}/output"

* Variables 
global demo "pct_nh_blk_alone_acs_15_19 pct_nh_white_alone_acs_15_19 pct_hisp_latino_acs_15_19 pct_college_acs_15_19 med_hhc_inc_acs_15_19 pct_pop_65plus_acs_15_19 log_tot_pop_acs_15_19"
global cubic "ZCTA_max_cases_p100k_rmpri5 ZCTA_max_cases_p100k_rmpri5_sq ZCTA_max_cases_p100k_rmpri5_cu"

global covidXwhite "cum_cases5_X_pct_white cases_5_X_pct_white cases_sq_5_X_pct_white cases_cu_5_X_pct_white"
global covidXinc "cum_cases5_X_inc cases_5_X_inc cases_sq_5_X_inc cases_cu_5_X_inc"	


*********************************************************************************
* Figure 4A: 
* Plot mean case rate by treatment, SE calculated using delta method 
*********************************************************************************
	
* needs to create "prison_treated_data_for_bs.dta"
use "${analysis_data}/San_Quentin_main_analysis_data.dta", clear
gunique ZCTA_contact if treated == 0
global N_ctrl = `r(unique)'
display "$N_ctrl"

keep if treated == 1 
egen ZCTA_id = group(ZCTA_contact)
replace ZCTA_id = ZCTA_id + $N_ctrl

save "${intermediate_data}/prison_treated_data_for_bs.dta", replace 


// Number of bootstraps
cd "${intermediate_data}/"
local B = 1000

// Load Xbar_t, Var(Xbar_t), ATT_t, and Var(ATT_t) values
use "${analysis_data}/San_Quentin_main_analysis_data.dta", clear
csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo, ivar(ZCTA_contact) time(month) gvar(first_treated) method(dripw) long
gunique ZCTA_contact if treated == 0
global N_ctrl = `r(unique)'
display "$N_ctrl"
matrix ATT = (e(b)[1,1], e(b)[1,2], 0, e(b)[1,3], e(b)[1,4], e(b)[1,5], e(b)[1,6], e(b)[1,7])
matrix VarATT = (e(V)[1,1], e(V)[2,2], 0, e(V)[3,3], e(V)[4,4], e(V)[5,5], e(V)[6,6], e(V)[7,7])

// Initialize matrices to store Xbar_t and ATT_t in each bootstrap iteration
matrix Xbar_bootstrap = J(8, `B', .)
matrix VarXbar_bootstrap = J(8, `B', .)
matrix ATT_bootstrap = J(8, `B', .)

// Bootstrap loop
forvalues i = 1/`B' {
	
	preserve 
	
		// Sample control zip codes with replacement
		qui use "${analysis_data}/San_Quentin_main_analysis_data.dta", clear
		qui bsample $N_ctrl if treated == 0, cluster(ZCTA_contact) idcluster(ZCTA_id)

		// Append treated zip codes
		append using "${intermediate_data}/prison_treated_data_for_bs.dta"
		
		// Estimate ATT
		qui csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo, ivar(ZCTA_id) time(month) gvar(first_treated) method(dripw) long
		matrix results = (e(b)[1,1], e(b)[1,2], 0, e(b)[1,3], e(b)[1,4], e(b)[1,5], e(b)[1,6], e(b)[1,7])

		qui drdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo if inlist(month, 5, 6), ///
			ivar(ZCTA_id) time(month) tr(treated) dripw
		cap drop wgt_demo wgt_demo_temp 
		drdid_predict wgt_demo_temp 
		gegen wgt_demo = max(wgt_demo_temp), by(ZCTA_id)
				
		// Compute covariance between Xbar_t and ATT_t for each month
		// Store Xbar_t and ATT_t in each bootstrap iteration
		forvalues j = 1/8{
			local m = `j' + 2
			qui sum ZCTA_max_cases_p100k_rmpri [w = wgt_demo] if month == `m' & treated == 0
			matrix Xbar_bootstrap[`j', `i'] = r(mean)
			matrix VarXbar_bootstrap[`j', `i'] = r(Var)
			matrix ATT_bootstrap[`j', `i'] = results[1, `j']
		}
    
    restore
}

// Compute mean of Xbar_t and Var(Xbar_t) from bootstrap samples
matrix Xbar = J(1, 8, .)
matrix VarXbar = J(1, 8, .)
forvalues j = 1/8 {
	
	// calculate mean of Xbar_t for month m = `j' + 2
    matrix col = Xbar_bootstrap[`j', 1..`B']
	matrix col = col'
    matrix mean = (J(1, `B', 1) * col) / `B'
	// repeat mean for B times 
	matrix m_mean = J(1, `B', .)
	forval k = 1/`B'{
		matrix m_mean[1, `k'] = mean[1, 1]
	}
	// save mean for month m to Xbar 
    matrix Xbar[1, `j'] = mean[1, 1]
	// calculate mean center value 
	matrix mc = col' - m_mean
	// calculate variance of Xbar 
    matrix variance = (mc * mc') / (`B' - 1)
	// save variance for month m to the matrix 
    matrix VarXbar[1, `j'] = variance[1, 1]
}

// Compute Cov(Xbar_t, ATT_t) from bootstrap samples
matrix ATTbar = J(1, 8, .)
matrix CovXbarATT = J(1, 8, .)
forvalues j = 1/8 {
    matrix xbar_col = Xbar_bootstrap[`j', 1..`B']
    matrix att_col = ATT_bootstrap[`j', 1..`B']
	matrix mean_x = J(1, `B', .)
	matrix mean_att = J(1, `B', .)
	// note: need to create ATTbar (i.e. mean of ATT boostrap) 
    matrix att_mean = (J(1, `B', 1) * att_col') / `B'
	matrix ATTbar[1, `j'] = att_mean[1, 1]
	forval k = 1/`B'{
		matrix mean_x[1, `k'] = Xbar[1, `j'] // used estimated mean of ATT_bootstrap instead of actual Xbar
		matrix mean_att[1, `k'] = ATTbar[1, `j'] // used estimated mean of ATT_bootstrap instead of actual ATT 
	}
    matrix covariance = ((xbar_col - mean_x) * (att_col - mean_att)') / `B'
    matrix CovXbarATT[1, `j'] = covariance[1, 1]
}


// Compute Var(yt) and SE(yt)
matrix Varyt = VarXbar + VarATT + 2 * CovXbarATT
matrix SEyt = (sqrt(Varyt[1,1]), sqrt(Varyt[1,2]), sqrt(Varyt[1,3]), sqrt(Varyt[1,4]), sqrt(Varyt[1,5]), sqrt(Varyt[1,6]), sqrt(Varyt[1,7]), sqrt(Varyt[1,8]))
matrix SExbar = (sqrt(VarXbar[1,1]), sqrt(VarXbar[1,2]), sqrt(VarXbar[1,3]), sqrt(VarXbar[1,4]), sqrt(VarXbar[1,5]), sqrt(VarXbar[1,6]), sqrt(VarXbar[1,7]), sqrt(VarXbar[1,8]))

// Compute y_t and 95% Confidence Intervals for y_t
matrix yt = Xbar + ATT

// save mean and se of outcome (covid cases) for treated vs. Control Zip Codes into a big matrix 
matrix result_delta_method_demo = (yt \ SEyt \ Xbar \ SExbar)

// save it to dataset 
matsave result_delta_method_demo, replace  dropall

// read data and transpose the matrix 
use result_delta_method_demo, clear 
xpose, clear varname
drop if _varname == "_rowname"

rename v1 y_1 
rename v2 se_1 
rename v3 y_0
rename v4 se_0

split _varname, parse("c")
rename _varname2 month 
destring month, replace 
replace month = month + 2
drop _varname* 

reshape long y_ se_, i(month) j(treated)
rename y_ m_ZCTA_cases_demo
	
// get 95 confidence interval 	
gen lb_ZCTA_cases_demo = m_ZCTA_cases_demo - 1.96 * se_ 
gen ub_ZCTA_cases_demo = m_ZCTA_cases_demo + 1.96 * se_ 

save "${intermediate_data}/predictions_delta_method_smartphone.dta", replace 

// plot
use  "${intermediate_data}/predictions_delta_method_smartphone.dta", clear
sort month treated 
twoway connected m_ZCTA_cases_demo month if treated == 1 & month <= 10, lcolor(navy) mcolor(navy) ///
||     connected m_ZCTA_cases_demo month if treated == 0 & month <= 10, lcolor(maroon) mcolor(maroon) ///
||     rcap ub_ZCTA_cases_demo lb_ZCTA_cases_demo month if treated == 1 & month <= 10, lcolor(navy) ///
||     rcap ub_ZCTA_cases_demo lb_ZCTA_cases_demo month if treated == 0 & month <= 10, lcolor(maroon) ///
  legend(order(1 "Treated Zip Codes" 2 "Control Zip Codes") rows(1) size(medium)) sort ///
  title("Zip Code Cases per 100k" "Matched on COVID-19 and Demographic Variables") xline(5, lcolor(gs8) lpattern(dash)) ///
  xtitle("Month") xlabel(3 "March" 4 "April" 5 "May" 6 "June" 7 "July" 8 "August" 9 "September" 10 "October")
	  
graph export "${output}/figure4a.png", replace  	  

	
*********************************************************************************
* Figure 4B: Effect plot
*********************************************************************************
	
use "${analysis_data}/San_Quentin_main_analysis_data.dta", clear  

csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo , ivar(ZCTA_contact) time(month) gvar(first_treated) method(dripw) long
qui estat event, estore(m1)

csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo , ivar(ZCTA_contact) time(month) gvar(first_treated_lodes) method(dripw) long
qui estat event, estore(m2)

event_plot m1 m2, noautolegend ///
stub_lag(Tp#) stub_lead(Tm#) together plottype(scatter) ciplottype(rcap) ///
	graph_opt(xtitle("Month") ytitle("Average Treatment Effect") ///
	legend(order(2 "Smartphone" 4 "LODES"))  ///
	note("Treated: Zip Codes Connected to San Quentin State Prison in June 2020" "Control: Zip Codes Unconnected to San Quentin State Prison in June 2020", size(small)) ///
	xlabel(-2 "March" -1 "April" 0 "June" 1 "July" 2 "August" 3 "September" 4 "October") ///
	title("Callaway and Sant'Anna DRIPW Estimator" "Matched on COVID-19 and Demographic Variables")  /// 
	xline(-0.5, lcolor(gs8) lpattern(dash)) yline(0, lcolor(gs8)) graphregion(color(white)) bgcolor(white) ylabel(, angle(horizontal))) ///
	lag_opt1(msymbol(+) color(cranberry)) lag_ci_opt1(color(cranberry) )
graph export "${output}/figure4b.png", replace 
